dot_cols <- iris %>% select(contains("."))
colnames(dat_cols)
head(iris)
setosa_df <- iris %>% filter(Species == "setosa")
large_sepals <- iris %>% filter(Sepal.Area > mean(Sepal.Area))
large_irises <- iris %>%
mutate(Sepal.Area = Sepal.Length * Sepal.Width, Petal.Area = Petal.Length * Petal.Width) %>%
filter(Sepal.Area > mean(Sepal.Area) & Petal.Area > mean(Petal.Area))
head(large_irises)
large_irises <- iris %>%
mutate(Sepal.Area = Sepal.Length * Sepal.Width, Petal.Area = Petal.Length * Petal.Width) %>%
filter(Sepal.Area > mean(Sepal.Area) & Petal.Area > mean(Petal.Area)) %>%
arrange(Sepal.Area, Petal.Area)
head(large_irises)
large_irises <- iris %>%
mutate(Sepal.Area = Sepal.Length * Sepal.Width, Petal.Area = Petal.Length * Petal.Width) %>%
filter(Sepal.Area > mean(Sepal.Area) & Petal.Area > mean(Petal.Area)) %>%
arrange(desc(Sepal.Area), desc(Petal.Area)) # removing desc()
head(large_irises)
iris_summary <- iris %>%
mutate(Sepal.Area = Sepal.Length * Sepal.Width, Petal.Area = Petal.Length * Petal.Width) %>%
summarize(n_obs = n(),
n_species = n_distinct(Species),
average_sepal_area = mean(Sepal.Area),
average_petal_area = mean(Petal.Area))
iris_summary
iris_summary <- iris %>%
mutate(Sepal.Area = Sepal.Length * Sepal.Width, Petal.Area = Petal.Length * Petal.Width) %>%
group_by(Species) %>%
summarize(n_obs = n(),
average_sepal_area = mean(Sepal.Area),
average_petal_area = mean(Petal.Area))
iris_summary
iris_subsetted <- iris %>%
group_by(Species)
iris_subsetted
table(iris$Species)
iris_subsetted <- list(filter(iris, Species == "setosa"),
filter(iris, Species = "versicolor"),
filter(iris, Species = "virginica"))
iris_subsetted <- list(filter(iris, Species == "setosa"),
filter(iris, Species == "versicolor"),
filter(iris, Species == "virginica"))
str(iris_subsetted)
lapply(iris_subsetted, nrow)
unlist(lapply(iris_subsetted, nrow))
?bind_rows
iris_whole <- bind_rows(iris_subsetted)
?rbin
?rbind
?reduce
iris_whole <- iris_subsetted %>% reduce(bind_rows)
iris_whole <- bind_rows(iris_subsetted)
library(purrr)
iris_whole_purr <- iris_subsetted %>% reduce(bind_rows)
table(iris_whole == iris_whole_purr)
new_var <- rnorm(nrow(iris_whole), 0, 1)
new_vars <- tibble(var1 = rnorm(nrow(iris_whole), 0, 1),
var1 = rnorm(nrow(iris_whole), 1, 1))
new_vars <- tibble(variable_1 = rnorm(nrow(iris_whole), 0, 1),
variable_2 = rnorm(nrow(iris_whole), 1, 1))
new_vars <- tibble(variable_1 = rnorm(nrow(iris), 0, 1),
variable_2 = rnorm(nrow(iris), 1, 1))
iris <- bind_cols(iris, new_vars)
key
tb1 <- tibble(key = letters,
variable1 = rnorm(key, 0, 1),
variable2 = rnorm(key, 1, 1))
tbl2 <- tibble(key = letters[1:10],
variable3 = rnorm(key, 2, 1))
tb2 <- tibble(key = letters[1:10],
variable3 = rnorm(key, 2, 1))
left_jointed <- left_join(tb1, tb2, by = c("key"))
head(left_jointed)
nrow(left_jointed)
right_joined <- right_join(tb1, tb2, by = c("key"))
head(left_jointed)
nrow(left_jointed)
head(right_joined)
nrow(right_joined)
tb1 <- tibble(key = c("A", "B", "C", "D", "E"),
variable1 = rnorm(key, 0, 1),
variable2 = rnorm(key, 1, 1))
tb2 <- tibble(key = c("B", "C", "D", "X", "Y", "X"),
variable3 = rnorm(key, 2, 1))
inner_joined <- inner_join(tb1, tb2, by = "key")
head(left_joined)
head(inner_joined)
nrow(inner_joined)
inner_joined$key
full_joined <- full_join(tb1, tb2, by = c("key"))
head(full_joined)
nrow(full_joined)
full_joined$key
semijoined <- semi_join(tb1, tb2, by = c("key"))
semijoined
inner_joined
antijoined
antijoined <- anti_join(tb1, tb2, by = c("key"))
antijoined
tb1
tb2
# Generate fake stock price data
set.seed(1234)
stocks <- data.frame(
time = as.Date('2009-01-01') + 0:9,
X = rnorm(10, 100, 10),
Y = rnorm(10, 125, 20),
Z = rnorm(10, 150, 4)
)
# Gather to make these into long data
stocks_long <- stocks %>% gather(stock, price, -time)
head(stocks_long)
# Make this back into wide data, which is the same as the original
stocks_wide <- stocks_long %>% spread(stock, price)
head(stocks_wide)
setequal(stocks, stocks_wide)
new_vars <- tibble(variable_1 = rnorm(nrow(iris), 0, 1),
variable_2 = rnorm(nrow(iris), 1, 1))
iris <- bind_cols(iris, new_vars)
# Subset into list
iris_subsetted <- list(filter(iris, Species == "setosa"),
filter(iris, Species == "versicolor"),
filter(iris, Species == "virginica"))
# See length of different data frames
unlist(lapply(iris_subsetted, nrow))
# Bind rows together
iris_whole <- bind_rows(iris_subsetted)
# We can also use use purrr when we want to feed multiple objects torepeat some pipeline over an object
# This is useful when you are merging or joining multiple data frames together
library(purrr)
iris_whole_purr <- iris_subsetted %>% reduce(bind_rows)
# Objects are the same
setequal(iris_whole, iris_whole_purr)
my_db <- src_mysql(dbname = "dplyr",
host = "courses.csrrinzqubik.us-east-1.rds.amazonaws.com",
port = 3306,
user = "student",
password = "datacamp")
install.packages("RMySQL")
library("RMySQL")
my_db <- src_mysql(dbname = "dplyr",
host = "courses.csrrinzqubik.us-east-1.rds.amazonaws.com",
port = 3306,
user = "student",
password = "datacamp")
nycflights <- tbl(my_db, "dplyr")
glimpse(nycflights)
nycflights %>%
group_by(carrier) %>%
summarise(n_flights = n(), avg_delay = mean(arr_delay, na.rm = T)) %>%
arrange(avg_delay)
flight_summaries <- nycflights %>%
group_by(carrier) %>%
summarise(n_flights = n(), avg_delay = mean(arr_delay, na.rm = TRUE)) %>%
arrange(avg_delay)
head(flight_summaries)
rm(list=ls())
load("new_sentences_master_09292018.Rdata")
dat = new_sentences_master
load("~/Desktop/89.RData")
load("~/Desktop/fergm.89.RData")
library(fergm)
p <- compare_predictions(ergm.89, fergm.89)
load("~/Desktop/89.RData")
load("~/Desktop/fergm.89.RData")
library(fergm)
compare_predictions
ergm.fit = ergm.89
fergm.fit = fergm.89
n_dyads <- choose(ergm.fit$network$gal$n, 2)
ergm.fit$network
as.matrix(ergm.fit$network)
?lt
lt
??lt
ergm.pred <- function() {
flo.truth <- lt(as.matrix(ergm.fit$network))
sim.pred <- lt(as.matrix(simulate.ergm(ergm.fit)))
sum(flo.truth == sim.pred) / n_dyads
}
pct_correct_ergm <- replicate(replications, ergm.pred())
replications = 10
n_dyads <- choose(ergm.fit$network$gal$n, 2)
ergm.pred <- function() {
flo.truth <- lt(as.matrix(ergm.fit$network))
sim.pred <- lt(as.matrix(simulate.ergm(ergm.fit)))
sum(flo.truth == sim.pred) / n_dyads
}
pct_correct_ergm <- replicate(replications, ergm.pred())
library(statnet)
flo.truth <- lt(as.matrix(ergm.fit$network))
lt <- function(m) { m[lower.tri(m)] }
ergm.pred <- function() {
flo.truth <- lt(as.matrix(ergm.fit$network))
sim.pred <- lt(as.matrix(simulate.ergm(ergm.fit)))
sum(flo.truth == sim.pred) / n_dyads
}
pct_correct_ergm <- replicate(replications, ergm.pred())
flo.truth <- lt(as.matrix(ergm.fit$network))
flo.truth
simulate.ergm(ergm.fit)
?simulate.ergm
ergm.fit$formula
ergm.fit$network
ergm.pred <- function() {
flo.truth <- lt(as.matrix(ergm.fit$network))
sim.pred <- lt(as.matrix(simulate.ergm(ergm.fit)))
sum(flo.truth == sim.pred) / n_dyads
}
pct_correct_ergm <- replicate(replications, ergm.pred())
ergm.fit$network
sim.pred <- lt(as.matrix(simulate(ergm.fit)))
ergm.fit$formula
sim.pred <- lt(as.matrix(ergm::simulate.ergm(ergm.fit)))
school <- ergm.fit$network
sim.pred <- lt(as.matrix(simulate.formula(ergm.fit$formula, coef = ergm.fit$coef)))
sum(flo.truth == sim.pred) / n_dyads
ergm.fit$formula
sim.pred <- lt(as.matrix(simulate.ergm(ergm.fit$formula, coef = ergm.fit$coef)))
sim.pred <- lt(as.matrix(simulate.ergm(ergm.fit)))
rm(list=ls())
load("~/Desktop/89.RData")
load("~/Desktop/fergm.89.RData")
library(fergm)
compare_predictions
ergm.fit = ergm.89
fergm.fit = fergm.89
set.seed(12345)
replications = 10
n_dyads <- choose(ergm.fit$network$gal$n, 2)
lt <- function(m) { m[lower.tri(m)] }
flo.truth <- lt(as.matrix(ergm.fit$network))
sim.pred <- lt(as.matrix(simulate.ergm(ergm.fit)))
ergm.fit$response
sim.pred <- lt(as.matrix(simulate.formula(ergm.fit$formula, coef = ergm.fit$coef)))
ergm.fit$formula[1]
ergm.fit$formula[2]
ergm.fit$formula[3]
ergm.fit$formula
ergm.fit$formula[[1]]
ergm.fit$formula[[0]]
ergm.fit$formula[[2]]
as.chracter(ergm.fit$formula[[2]])
as.character(ergm.fit$formula[[2]])
nw <- ergm.fit$network
?update.formula
new_formula <- update.formula(ergm.fit$formula, nw ~ .)
new_formula
sim.pred <- lt(as.matrix(simulate.formula(new_formula, coef = ergm.fit$coef)))
compare_predictions
rm(list=ls())
load("~/Desktop/89.RData")
rm(list=ls())
load("~/Desktop/89.RData")
load("~/Desktop/fergm.89.RData")
library(fergm)
compare_predictions <- function(ergm.fit = NULL, fergm.fit = NULL, seed = 12345, replications = 500){
lt <- function(m) { m[lower.tri(m)] }
n_dyads <- choose(ergm.fit$network$gal$n, 2)
ergm.pred <- function() {
flo.truth <- lt(as.matrix(ergm.fit$network))
nw <- ergm.fit$network # Was looking for a "school" network and couldn't find it
new_formula <- update.formula(ergm.fit$formula, nw ~ .)
sim.pred <- lt(as.matrix(simulate.formula(new_formula, coef = ergm.fit$coef)))
sum(flo.truth == sim.pred) / n_dyads
}
pct_correct_ergm <- replicate(replications, ergm.pred())
stan.dta <- fergm.fit$stan.dta
stan.fit <- fergm.fit$stan.fit
truth <- stan.dta$y
predictions <- extract(stan.fit, "predictions")$predictions
pct_correct_fergm <- sapply(1:nrow(predictions),
function(r) sum(truth == predictions[r,]) / n_dyads)
correct_mat <- cbind(pct_correct_ergm, pct_correct_fergm)
improvement <- round(((mean(pct_correct_fergm)-mean(pct_correct_ergm))/mean(pct_correct_ergm))*100, 2)
cat(paste0("The FERGM fit reflects a ", improvement, "% improvement in tie prediction relative to the ERGM across ", replications, " simulations"))
return(correct_mat)
}
preds <- compare_predictions(ergm.89, fergm.89)
compare_predictions <- function(ergm.fit = NULL, fergm.fit = NULL, seed = 12345, replications = 500){
lt <- function(m) { m[lower.tri(m)] }
n_dyads <- choose(ergm.fit$network$gal$n, 2)
ergm.pred <- function() {
flo.truth <- lt(as.matrix(ergm.fit$network))
nw <- ergm.fit$network # Was looking for a "school" network and couldn't find it
new_formula <- update.formula(ergm.fit$formula, nw ~ .)
sim.pred <- lt(as.matrix(simulate.formula(new_formula, coef = ergm.fit$coef)))
sum(flo.truth == sim.pred) / n_dyads
}
pct_correct_ergm <- replicate(replications, ergm.pred())
stan.dta <- fergm.fit$stan.dta
stan.fit <- fergm.fit$stan.fit
truth <- stan.dta$y
predictions <- extract(stan.fit, "predictions")$predictions
pct_correct_fergm <- sapply(1:nrow(predictions),
function(r) sum(truth == predictions[r,]) / n_dyads)
correct_mat <- cbind(pct_correct_ergm, pct_correct_fergm)
improvement <- round(((mean(pct_correct_fergm)-mean(pct_correct_ergm))/mean(pct_correct_ergm))*100, 2)
cat(paste0("The FERGM fit reflects a ", improvement, "% improvement in tie prediction relative to the ERGM across ", replications, " simulations"))
return(correct_mat)
}
preds <- compare_predictions(ergm.89, fergm.89)
ergm.fit = ergm.89
fergm.fit = fergm.89
lt <- function(m) { m[lower.tri(m)] }
n_dyads <- choose(ergm.fit$network$gal$n, 2)
ergm.pred <- function() {
flo.truth <- lt(as.matrix(ergm.fit$network))
nw <- ergm.fit$network # Was looking for a "school" network and couldn't find it
new_formula <- update.formula(ergm.fit$formula, nw ~ .)
sim.pred <- lt(as.matrix(simulate.formula(new_formula, coef = ergm.fit$coef)))
sum(flo.truth == sim.pred) / n_dyads
}
replications = 500)
install.packages("fergm")
install.packages("fergm")
rm(list=ls())
load("~/Desktop/89.RData")
load("~/Desktop/fergm.89.RData")
library(fergm)
compare_predictions
compare_predictions <- function(ergm.fit = NULL, fergm.fit = NULL, seed = NULL, replications = 500)
{
if (!is.null(seed)) {
set.seed(seed)
}
else {
warning("Note: This function relies upon network simulation to compare ERGM and FERGM predictions.  Consider specifying a seed to set to ensure replicability.")
}
lt <- function(m) {
m[lower.tri(m)]
}
n_dyads <- choose(ergm.fit$network$gal$n, 2)
ergm.pred <- function() {
flo.truth <- lt(as.matrix(ergm.fit$network))
sim.pred <- lt(as.matrix(simulate.ergm(ergm.fit)))
sum(flo.truth == sim.pred)/n_dyads
}
pct_correct_ergm <- replicate(replications, ergm.pred())
stan.dta <- fergm.fit$stan.dta
stan.fit <- fergm.fit$stan.fit
truth <- stan.dta$y
predictions <- extract(stan.fit, "predictions")$predictions
pct_correct_fergm <- sapply(1:nrow(predictions), function(r) sum(truth ==
predictions[r, ])/n_dyads)
correct_mat <- cbind(pct_correct_ergm, pct_correct_fergm)
improvement <- round(((mean(pct_correct_fergm) - mean(pct_correct_ergm))/mean(pct_correct_ergm)) *
100, 2)
cat(paste0("The FERGM fit reflects a ", improvement, "% improvement in tie prediction relative to the ERGM across ",
replications, " simulations"))
return(correct_mat)
}
preds <- compare_predictions(ergm.fit = ergm.89, fergm.fit = fergm.89)
ergm::simulate.ergm
compare_predictions <- function(ergm.fit = NULL, fergm.fit = NULL, seed = NULL, replications = 500)
{
if (!is.null(seed)) {
set.seed(seed)
}
else {
warning("Note: This function relies upon network simulation to compare ERGM and FERGM predictions.  Consider specifying a seed to set to ensure replicability.")
}
lt <- function(m) {
m[lower.tri(m)]
}
n_dyads <- choose(ergm.fit$network$gal$n, 2)
ergm.pred <- function() {
flo.truth <- lt(as.matrix(ergm.fit$network))
sim.pred <- lt(as.matrix(ergm::simulate.ergm(ergm.fit)))
sum(flo.truth == sim.pred)/n_dyads
}
pct_correct_ergm <- replicate(replications, ergm.pred())
stan.dta <- fergm.fit$stan.dta
stan.fit <- fergm.fit$stan.fit
truth <- stan.dta$y
predictions <- extract(stan.fit, "predictions")$predictions
pct_correct_fergm <- sapply(1:nrow(predictions), function(r) sum(truth ==
predictions[r, ])/n_dyads)
correct_mat <- cbind(pct_correct_ergm, pct_correct_fergm)
improvement <- round(((mean(pct_correct_fergm) - mean(pct_correct_ergm))/mean(pct_correct_ergm)) *
100, 2)
cat(paste0("The FERGM fit reflects a ", improvement, "% improvement in tie prediction relative to the ERGM across ",
replications, " simulations"))
return(correct_mat)
}
preds <- compare_predictions(ergm.fit = ergm.89, fergm.fit = fergm.89)
sessionInfo()
rm(list=ls())
load("~/Desktop/89.RData")
load("~/Desktop/fergm.89.RData")
library(fergm)
compare_predictions <- function(ergm.fit = NULL, fergm.fit = NULL, seed = NULL, replications = 500){
if(!is.null(seed)){
set.seed(seed)
} else {
warning("Note: This function relies upon network simulation to compare ERGM and FERGM predictions.  Consider specifying a seed to set to ensure replicability.")
}
lt <- function(m) { m[lower.tri(m)] }
n_dyads <- choose(ergm.fit$network$gal$n, 2)
nw <- ergm.fit$network
new_formula <- update.formula(ergm.fit$formula, nw ~ .)
ergm_coefs <- ergm.fit$coef
ergm.pred <- function()
{
flo.truth <- lt(as.matrix(nw))
sim.pred <- lt(as.matrix(simulate.formula(object = new_formula, coef = ergm_coefs)))
sum(flo.truth == sim.pred) / n_dyads
}
pct_correct_ergm <- replicate(replications, ergm.pred())
stan.dta <- fergm.fit$stan.dta
stan.fit <- fergm.fit$stan.fit
truth <- stan.dta$y
predictions <- extract(stan.fit, "predictions")$predictions
pct_correct_fergm <- sapply(1:nrow(predictions),
function(r) sum(truth == predictions[r,]) / n_dyads)
correct_mat <- cbind(pct_correct_ergm, pct_correct_fergm)
improvement <- round(((mean(pct_correct_fergm)-mean(pct_correct_ergm))/mean(pct_correct_ergm))*100, 2)
cat(paste0("The FERGM fit reflects a ", improvement, "% improvement in tie prediction relative to the ERGM across ", replications, " simulations"))
return(correct_mat)
}
preds <- compare_predictions(ergm.fit = ergm.89, fergm.fit = fergm.89, replications = 10)
install.packages("smcure")
library(smcure)
data(e1684)
pd <- smcure(Surv(FAILTIME,FAILCENS)~TRT+SEX+AGE,cureform=~TRT+SEX+AGE,
data=e1684,model="ph",Var = FALSE)
printsmcure(pd,Var = FALSE)
predm=predictsmcure(pd,newX=cbind(c(1,0),c(0,0),c(0.579,0.579)),
newZ=cbind(c(1,0),c(0,0),c(0.579,0.579)),
plotpredictsmcure(predm,model="ph")
)
predm=predictsmcure(pd,newX=cbind(c(1,0),c(0,0),c(0.579,0.579)),
newZ=cbind(c(1,0),c(0,0),c(0.579,0.579)),model="ph")
plotpredictsmcure(predm,model="ph")
head(e1684)
pd <- smcure(Surv(FAILTIME,FAILCENS)~TRT+strata(SEX)+AGE,cureform=~TRT+strata(SEX)+AGE,
data=e1684,model="ph",Var = FALSE)
printsmcure(pd,Var = FALSE)
predm=predictsmcure(pd,newX=cbind(c(1,0),c(0,0),c(0.579,0.579)),
newZ=cbind(c(1,0),c(0,0),c(0.579,0.579)),model="ph")
plotpredictsmcure(predm,model="ph")
?frailty
pd <- smcure(Surv(FAILTIME,FAILCENS)~TRT+frailty(SEX)+AGE,cureform=~TRT+frailty(SEX)+AGE,
data=e1684,model="ph",Var = FALSE)
pd <- smcure(Surv(FAILTIME,FAILCENS)~TRT+frailty(SEX)+AGE,cureform=~TRT+sex+AGE,
data=e1684,model="ph",Var = FALSE)
pd <- smcure(Surv(FAILTIME,FAILCENS)~TRT+frailty(SEX)+AGE,cureform=~TRT+SEX+AGE,
data=e1684,model="ph",Var = FALSE)
e1684$letters <- letters
?smcure
library(survival)
frailty(e1684$TRT)
pd <- smcure(Surv(FAILTIME,FAILCENS)~TRT+frailty(SEX)+AGE,cureform=~TRT+SEX+AGE,
data=e1684,model="ph",Var = FALSE)
pd <- smcure(Surv(FAILTIME,FAILCENS)~TRT+SEX+AGE,cureform=~TRT+frailty(SEX)+AGE,
data=e1684,model="ph",Var = FALSE)
pd$s
############################
#
# RUN FILE
# Journal of Substance
# Abuse Treatment Archive
# B. W. Campbell
# Last Updated:  11/05/2018
#
############################
rm(list=ls())
# will need to adjust local working directory, this is BWC's
wd <- "~/Dropbox/Projects/Projects_Skyler/NetworkTCs/TNAM_pilot_data/survivalPaper/ReplicationArchive"
setwd(wd)
# install necessary packages
needed <- setdiff(c("stringr", "plyr", "igraph", "survival", "lubridate", "tnam", "ggplot2", "devtools"),
installed.packages()[,"Package"])
if(length(needed) > 0){
install.packages(needed, repos = "https://cloud.r-project.org/", dependencies = TRUE)
}
devtools::install_version("riskRegression", version = "2018.04.21", repos = "http://cran.us.r-project.org", type = "source")
devtools::install_version("riskRegression", version = "2018.04.21", repos = "http://cran.us.r-project.org", type = "binary")
?install_version
needed <- setdiff(c("stringr", "plyr", "igraph", "survival", "lubridate", "tnam", "ggplot2", "devtools", "riskRegression"),
installed.packages()[,"Package"])
if(length(needed) > 0){
install.packages(needed, repos = "https://cloud.r-project.org/", dependencies = TRUE)
}
write.data = FALSE
############################
#
# RUN FILE
# Journal of Substance
# Abuse Treatment Archive
# B. W. Campbell
# Last Updated:  11/05/2018
#
############################
rm(list=ls())
# will need to adjust local working directory, this is BWC's
wd <- "~/Dropbox/Projects/Projects_Skyler/NetworkTCs/TNAM_pilot_data/survivalPaper/ReplicationArchive"
setwd(wd)
# install necessary packages
needed <- setdiff(c("stringr", "plyr", "igraph", "survival", "lubridate", "tnam", "ggplot2", "devtools", "riskRegression"),
installed.packages()[,"Package"])
if(length(needed) > 0){
install.packages(needed, repos = "https://cloud.r-project.org/", dependencies = TRUE)
}
# Set to FALSE if the data file (CCDW_SurvivalData.RData) has already be written
write.data = FALSE
# run code to reproduce display items
source("CCDW_ReplicationArchive.R", print.eval = TRUE)
sink("CCDW_SessionInfo.txt")
sessionInfo()
sink()
